`timescale 1ns / 1ps
//////////////////////////////////////////////////////////////////////////////////
// Company: 
// Engineer: 
// 
// Create Date: 2021/09/14 14:22:48
// Design Name: 
// Module Name: Get_u42a_v2
// Project Name: 
// Target Devices: 
// Tool Versions: 
// Description: 
// 
// Dependencies: 
// 
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
// 
//////////////////////////////////////////////////////////////////////////////////



module Get_u42a_v2#(
     parameter DATA_IN_WIDTH      = 24      , ////signed 21bit fra
	 parameter DATA_OUT_WIDTH     = 32      ,
	 parameter COMPUTE_LEN_WIDTH  = 14    
	 
   )(
    input i_clk,
    input i_rstn,
    input  [DATA_IN_WIDTH-1:0] i_real_norm_center,
    input i_real_norm_center_vld,
    input i_real_norm_center_last,
 
//    output reg [COMPUTE_LEN_WIDTH-1:0] noweak_count,
//    output reg noweak_flag,
//    output o_real_norm_center,
//    output o_real_norm_valid,
//    output o_real_norm_last
   output u42a_data_valid,
   output [DATA_OUT_WIDTH-1:0] u42a_data  //29bit fra

    );

    
  
 wire [2*DATA_IN_WIDTH-1:0] i_r_norm_c2;
 wire [2*DATA_IN_WIDTH-1:0] i_r_norm_c4;
 wire i_r_norm_c2_valid; 
 wire i_r_norm_c4_valid;
//  mult_a_cnorm2 mult_a_cnorm2_inst (
//    .CLK(i_clk),  // input wire CLK
//    .A(i_real_norm_center),      // input wire [23 : 0] A
//    .B(i_real_norm_center),      // input wire [23 : 0] B
//    .P(i_r_norm_c2)      // output wire [47 : 0] P     42bit fra
//  );

    mult_myself #(
	     .N          (  24  ) ,
	     .M          (  24  )
	     ) 
    mult_myself1_inst (
      .clk(i_clk),                      // input wire clk
      .rstn(i_rstn),
      .data_rdy(i_real_norm_center_vld),      // 
      .mult1(i_real_norm_center),        //
      .mult2(i_real_norm_center),                  // 
      .res_rdy(i_r_norm_c2_valid),            // 
      .res(i_r_norm_c2)
    
    ); 

  
  wire [DATA_IN_WIDTH-1:0] i_r_norm_c2_cut;
  assign i_r_norm_c2_cut=i_r_norm_c2[47:24];  // 18bit fra
  
//  mult_a_cnorm4 mult_a_cnorm4_inst (
//    .CLK(i_clk),  // input wire CLK
//    .A(i_r_norm_c2_cut),      // input wire [23 : 0] A
//    .B(i_r_norm_c2_cut),      // input wire [23 : 0] B
//    .P(i_r_norm_c4)      // output wire [47 : 0] P   36bit fra
//  );
     mult_myself_unsign #(
	     .N          (  24  ) ,
	     .M          (  24  )
	     ) 
    mult_myself_unsign1_inst (
      .clk(i_clk),                      // input wire clk
      .rstn(i_rstn),
      .data_rdy(i_r_norm_c2_valid),      // 
      .mult1(i_r_norm_c2_cut),        //
      .mult2(i_r_norm_c2_cut),                  // 
      .res_rdy(i_r_norm_c4_valid),            // 
      .res(i_r_norm_c4)
    
    );  
  
  wire i_r_norm_c2_last_d48;
  
//  delay_module delay_module_valid_inst (
//    .D(i_real_norm_center_vld),      // input wire [0 : 0] D
//    .CLK(i_clk),  // input wire CLK
//    .Q(i_r_norm_c2_valid_d8)      // output wire [0 : 0] Q
//  );
  
//   delay_module delay_module_last_inst (
//     .D(i_real_norm_center_last),      // input wire [0 : 0] D
//     .CLK(i_clk),  // input wire CLK
//     .Q(i_r_norm_c2_lasst_d8)      // output wire [0 : 0] Q
//   );
   xdelay_module #(
	     .D      (  48 ),
	     .W       (   1)
	      )
	    
   xdelay_module_last1_inst (
      .i_clk(i_clk),      // input wire clk
      .i_rstn(i_rstn),
      .s_i(i_real_norm_center_last),      
      .s_o(i_r_norm_c2_last_d48) 
    );    
   
  wire [47:0] i_r_norm_c2_d24;
    xdelay_module #(
	     .D      (  24 ),
	     .W       (  48)
	      )
	    
   xdelay_module_data1_inst (
      .i_clk(i_clk),      // input wire clk
      .i_rstn(i_rstn),
      .s_i(i_r_norm_c2),      
      .s_o(i_r_norm_c2_d24) 
    ); 
  
 
  
  
 wire o_sum_r_vld;
  
  reg  [60:0] sum_r_norm_c2;
  reg  [60:0] sum_r_norm_c4;
  
  wire [47:0] sum_r_norm_c2_mean;
  wire  [47:0] sum_r_norm_c4_mean;
          reg sun_r_rst;
          
          always @(posedge i_clk) begin
              if(sun_r_rst||~i_rstn) begin sum_r_norm_c2 <= 0; sum_r_norm_c4<= 0; end
              else if(i_r_norm_c4_valid) begin  sum_r_norm_c2 <= sum_r_norm_c2 + i_r_norm_c2_d24; sum_r_norm_c4 <= sum_r_norm_c4 + i_r_norm_c4;end
              else begin  sum_r_norm_c2 <= sum_r_norm_c2;  sum_r_norm_c4 <= sum_r_norm_c4; end    
          end
          
          assign sum_r_norm_c2_mean = sum_r_norm_c2[60:13];
          assign sum_r_norm_c4_mean = sum_r_norm_c4[60:13];
          
          
    wire [DATA_IN_WIDTH-1:0] sum_r_norm_c2_mean_cut;
     assign sum_r_norm_c2_mean_cut=sum_r_norm_c2_mean[47:24];  // 18bit fra        
          
          wire [2*DATA_IN_WIDTH-1:0] sum_r_norm_c2_mean_square;
          wire  sum_r_norm_c2_mean_square_valid;
//          mult_a_cnorm4 mult_sum_r_norm_c2_mean_inst (
//            .CLK(i_clk),  // input wire CLK
//            .A(sum_r_norm_c2_mean_cut),      // input wire [23 : 0] A
//            .B(sum_r_norm_c2_mean_cut),      // input wire [23 : 0] B
//            .P(sum_r_norm_c2_mean_square)      // output wire [47 : 0] P //36bit fra
//          );      
       
          mult_myself_unsign #(
	     .N          (  24  ) ,
	     .M          (  24  )
	     ) 
    mult_myself_unsign2_inst (
      .clk(i_clk),                      // input wire clk
      .rstn(i_rstn),
      .data_rdy(o_sum_r_vld),      // 
      .mult1(sum_r_norm_c2_mean_cut),        //
      .mult2(sum_r_norm_c2_mean_cut),                  // 
      .res_rdy(sum_r_norm_c2_mean_square_valid),            // 
      .res(sum_r_norm_c2_mean_square)
    
    );    
       
    
      wire [47:0] sum_r_norm_c4_mean_d24;
    xdelay_module #(
	     .D      (  24 ),
	     .W       (  48)
	      )
	    
   xdelay_module_data2_inst (
      .i_clk(i_clk),      // input wire clk
      .i_rstn(i_rstn),
      .s_i(sum_r_norm_c4_mean),      
      .s_o(sum_r_norm_c4_mean_d24) 
    );
          
//     reg  [47:0]   sum_r_norm_c4_mean_d1, sum_r_norm_c4_mean_d3,sum_r_norm_c4_mean_d2,sum_r_norm_c4_mean_d4;
//     reg o_sum_r_vld_d1,o_sum_r_vld_d2,o_sum_r_vld_d3,o_sum_r_vld_d4;
     
//    always @(posedge i_clk) 
//         begin
//             if(!i_rstn)
//                 begin
//                     sum_r_norm_c4_mean_d1 <= 0    ;
//                     sum_r_norm_c4_mean_d2 <= 0    ;
//                     sum_r_norm_c4_mean_d3 <= 0    ;
//                     sum_r_norm_c4_mean_d4 <= 0    ;
//                     o_sum_r_vld_d1<=0;
//                     o_sum_r_vld_d2<=0;
//                     o_sum_r_vld_d3<=0;
//                     o_sum_r_vld_d4<=0;
              
//                 end
//             else 
//                 begin
//                     sum_r_norm_c4_mean_d1 <= sum_r_norm_c4_mean    ;
//                     sum_r_norm_c4_mean_d2 <= sum_r_norm_c4_mean_d1 ;
//                     sum_r_norm_c4_mean_d3 <= sum_r_norm_c4_mean_d2 ;
//                     sum_r_norm_c4_mean_d4 <= sum_r_norm_c4_mean_d3 ;
//                     o_sum_r_vld_d1<=o_sum_r_vld;
//                     o_sum_r_vld_d2<=o_sum_r_vld_d1; 
//                     o_sum_r_vld_d3<=o_sum_r_vld_d2;        
//                     o_sum_r_vld_d4<=o_sum_r_vld_d3;               
//                 end



//         end  
  wire [95:0] o_u42a_data ;  
//div_gen div_gen_inst (
//            .aclk(i_clk),                                      // input wire aclk
//            .s_axis_divisor_tvalid(sum_r_norm_c2_mean_square_valid),    // input wire s_axis_divisor_tvalid
//            .s_axis_divisor_tdata(sum_r_norm_c2_mean_square),      // input wire [63 : 0] s_axis_divisor_tdata
//            .s_axis_dividend_tvalid(sum_r_norm_c2_mean_square_valid),  // input wire s_axis_dividend_tvalid
//            .s_axis_dividend_tdata(sum_r_norm_c4_mean_d24),    // input wire [63 : 0] s_axis_dividend_tdata sum_r_norm_c2_mean_square
//            .m_axis_dout_tvalid(u42a_data_valid),          // output wire m_axis_dout_tvalid
//            .m_axis_dout_tdata(o_u42a_data)            // output wire [127 : 0] m_axis_dout_tdata
//          );  
     divider_myself #(
	     .N          (  96  ) ,
	     .M          (  48  ) ,
	     .N_ACT      (  143  )
	     ) 
    divider_myself1_inst (
      .clk(i_clk),                      // input wire clk
      .rstn(i_rstn),
      .data_rdy(sum_r_norm_c2_mean_square_valid),      // 
      .dividend({sum_r_norm_c4_mean_d24,48'b0}),        //
      .divisor(sum_r_norm_c2_mean_square),                  // 
      .res_rdy(u42a_data_valid),            // 
      .merchant(o_u42a_data),           // output wire full
      .remainder() 
    
    );    

//   assign  u42a_data= o_u42a_data[50:19];  //29bit fra
      assign  u42a_data= o_u42a_data[52:21];  //27bit fra
          
          reg state,next_state;
          reg sum_r_vld;
          parameter Init=1'b0, Start=1'b1;
          always @(*) begin
              case(state)
                  Init: begin
                      if(~i_rstn) next_state = Init;
                      else next_state = Start;
                  end
                  Start: begin
                      if(~i_rstn) next_state = Init;
                      else next_state = Start;
                  end
              endcase
          end
          
          // State transfer block
          always @(posedge i_clk) begin
              if(~i_rstn) state <= Init;
              else state <= next_state;
          end
          
      
          // State output
          always @(posedge i_clk) begin
              // in default we disable and reset all counter
              sun_r_rst     <= 1;
              sum_r_vld     <= 0; // output data is invalid
              case(next_state)
                  Init: ; // do as the default
                  Start: begin
                      sum_r_vld <= i_r_norm_c2_last_d48;
                      sun_r_rst <= i_r_norm_c2_last_d48;
                      end
              endcase
          end
          
          assign o_sum_r_vld = sum_r_vld;

  
      endmodule